"""
Tests for ArxivDownloader.
"""

import pytest

from src.local_deep_research.research_library.downloaders.arxiv import (
    ArxivDownloader,
)
from src.local_deep_research.research_library.downloaders.base import (
    ContentType,
)


class TestArxivCanHandle:
    """Tests for ArxivDownloader.can_handle()."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_can_handle_arxiv_org_abs(self, downloader):
        """Recognizes arxiv.org/abs URLs."""
        assert downloader.can_handle("https://arxiv.org/abs/2301.12345") is True

    def test_can_handle_arxiv_org_pdf(self, downloader):
        """Recognizes arxiv.org/pdf URLs."""
        assert (
            downloader.can_handle("https://arxiv.org/pdf/2301.12345.pdf")
            is True
        )

    def test_can_handle_export_subdomain(self, downloader):
        """Recognizes export.arxiv.org URLs."""
        assert (
            downloader.can_handle("https://export.arxiv.org/abs/2301.12345")
            is True
        )

    def test_can_handle_with_version(self, downloader):
        """Recognizes URLs with version suffix."""
        assert (
            downloader.can_handle("https://arxiv.org/abs/2301.12345v2") is True
        )

    def test_can_handle_old_format(self, downloader):
        """Recognizes old format arXiv IDs."""
        assert (
            downloader.can_handle("https://arxiv.org/abs/cond-mat/0501234")
            is True
        )

    def test_cannot_handle_pubmed(self, downloader):
        """Returns False for PubMed URLs."""
        assert (
            downloader.can_handle("https://pubmed.ncbi.nlm.nih.gov/12345678")
            is False
        )

    def test_cannot_handle_generic(self, downloader):
        """Returns False for generic URLs."""
        assert downloader.can_handle("https://example.com/paper.pdf") is False

    def test_cannot_handle_empty_url(self, downloader):
        """Returns False for empty URL."""
        assert downloader.can_handle("") is False

    def test_cannot_handle_invalid_url(self, downloader):
        """Returns False for invalid URL."""
        assert downloader.can_handle("not a url") is False


class TestArxivIdExtraction:
    """Tests for arXiv ID extraction."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_extract_new_format(self, downloader):
        """Extracts 2301.12345 format."""
        url = "https://arxiv.org/abs/2301.12345"
        assert downloader._extract_arxiv_id(url) == "2301.12345"

    def test_extract_new_format_with_version(self, downloader):
        """Strips version from new format ID."""
        url = "https://arxiv.org/abs/2301.12345v2"
        assert downloader._extract_arxiv_id(url) == "2301.12345"

    def test_extract_old_format(self, downloader):
        """Extracts cond-mat/0501234 format."""
        url = "https://arxiv.org/abs/cond-mat/0501234"
        assert downloader._extract_arxiv_id(url) == "cond-mat/0501234"

    def test_extract_old_format_with_version(self, downloader):
        """Strips version from old format ID."""
        url = "https://arxiv.org/abs/cond-mat/0501234v1"
        assert downloader._extract_arxiv_id(url) == "cond-mat/0501234"

    def test_extract_from_pdf_url(self, downloader):
        """Extracts from PDF URL."""
        url = "https://arxiv.org/pdf/2301.12345.pdf"
        assert downloader._extract_arxiv_id(url) == "2301.12345"

    def test_extract_from_invalid_url(self, downloader):
        """Returns None for invalid URL."""
        url = "https://example.com/paper.pdf"
        assert downloader._extract_arxiv_id(url) is None


class TestArxivDownload:
    """Tests for arXiv download functionality."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_download_pdf_success(self, downloader, mocker, mock_pdf_content):
        """Successfully downloads PDF."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        content = downloader.download(
            "https://arxiv.org/abs/2301.12345", ContentType.PDF
        )
        assert content is not None
        assert content == mock_pdf_content

    def test_download_constructs_correct_url(
        self, downloader, mocker, mock_pdf_content
    ):
        """Constructs correct PDF URL from abstract URL."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mock_get = mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        downloader.download("https://arxiv.org/abs/2301.12345", ContentType.PDF)

        # Verify the PDF URL was called
        call_args = mock_get.call_args
        assert "https://arxiv.org/pdf/2301.12345.pdf" in str(call_args)

    def test_download_invalid_url_returns_none(self, downloader):
        """Returns None for invalid arXiv URL."""
        content = downloader.download(
            "https://example.com/paper.pdf", ContentType.PDF
        )
        assert content is None


class TestArxivDownloadWithResult:
    """Tests for download_with_result method."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_download_with_result_invalid_url(self, downloader):
        """Returns skip reason for invalid URL."""
        result = downloader.download_with_result(
            "https://example.com/not-arxiv"
        )
        assert result.is_success is False
        assert result.skip_reason is not None
        assert "could not extract" in result.skip_reason.lower()

    def test_download_with_result_pdf_success(
        self, downloader, mocker, mock_pdf_content
    ):
        """Returns success for PDF download."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://arxiv.org/abs/2301.12345"
        )
        assert result.is_success is True
        assert result.content is not None

    def test_download_with_result_pdf_failure(self, downloader, mocker):
        """Returns skip reason for failed PDF download."""
        mock_response = mocker.Mock()
        mock_response.status_code = 404
        mock_response.content = b""
        mock_response.headers = {}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://arxiv.org/abs/2301.12345"
        )
        assert result.is_success is False
        assert result.skip_reason is not None
        assert "failed to download" in result.skip_reason.lower()


class TestArxivApiIntegration:
    """Tests for arXiv API metadata fetching."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_fetch_from_arxiv_api_success(
        self, downloader, mocker, mock_arxiv_api_response
    ):
        """Successfully fetches metadata from arXiv API."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.text = mock_arxiv_api_response

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )

        metadata = downloader._fetch_from_arxiv_api("2301.12345")
        assert metadata is not None
        assert "Test Paper" in metadata
        assert "John Doe" in metadata

    def test_fetch_from_arxiv_api_failure(self, downloader, mocker):
        """Returns None on API failure."""
        mock_response = mocker.Mock()
        mock_response.status_code = 500

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )

        metadata = downloader._fetch_from_arxiv_api("2301.12345")
        assert metadata is None

    def test_fetch_from_arxiv_api_timeout(self, downloader, mocker):
        """Returns None on timeout."""
        import requests

        mocker.patch.object(
            downloader.session,
            "get",
            side_effect=requests.exceptions.Timeout("Timeout"),
        )

        metadata = downloader._fetch_from_arxiv_api("2301.12345")
        assert metadata is None


class TestArxivTextDownload:
    """Tests for text content download."""

    @pytest.fixture
    def downloader(self):
        return ArxivDownloader(timeout=30)

    def test_download_text_extracts_from_pdf(
        self, downloader, mocker, mock_pdf_content
    ):
        """Downloads PDF and extracts text."""
        # Mock PDF download
        mock_pdf_response = mocker.Mock()
        mock_pdf_response.status_code = 200
        mock_pdf_response.content = mock_pdf_content
        mock_pdf_response.headers = {"content-type": "application/pdf"}

        # Mock API response
        mock_api_response = mocker.Mock()
        mock_api_response.status_code = 200
        mock_api_response.text = """<?xml version="1.0"?>
        <feed xmlns="http://www.w3.org/2005/Atom">
            <entry>
                <title>Test Paper</title>
                <summary>Test abstract</summary>
            </entry>
        </feed>"""

        def mock_get(url, **kwargs):
            if "export.arxiv.org/api" in url:
                return mock_api_response
            return mock_pdf_response

        mocker.patch.object(downloader.session, "get", side_effect=mock_get)
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        # Note: text extraction from minimal PDF returns None/empty
        result = downloader.download_with_result(
            "https://arxiv.org/abs/2301.12345",
            ContentType.TEXT,
        )
        # The minimal PDF has no text, so this tests the flow even if content extraction fails
        # In production with real PDFs, this would return the extracted text
        assert result is not None
